<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2016-2017 Cask Data, Inc." name="copyright" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>Installation using Apache Ambari</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-section-numbering.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="Installation" href="index.html" />
    <link rel="next" title="Installation for MapR" href="mapr.html" />
    <link rel="prev" title="Installation on Amazon EMR using Bootstrap Actions" href="emr.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: admin-manual -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a>
      </li>
      <li class="toctree-l1"><b><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cdap-components.html"> CDAP Components</a></li>
<li class="toctree-l1"><a class="reference internal" href="../deployment-architectures.html"> Deployment Architectures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../hadoop-compatibility.html"> Hadoop Compatibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cdap-hadoop-compatibility.html"> CDAP and Hadoop Compatibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="../system-requirements.html"> System Requirements</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> Installation</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="cloudera.html">Cloudera Manager</a></li>
<li class="toctree-l2"><a class="reference internal" href="emr.html">Amazon EMR</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Apache Ambari</a></li>
<li class="toctree-l2"><a class="reference internal" href="mapr.html">MapR</a></li>
<li class="toctree-l2"><a class="reference internal" href="azure-hdinsight.html">Microsoft Azure HDInsight</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html">Packages</a></li>
<li class="toctree-l2"><a class="reference internal" href="replication.html">Replication</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../incompatibilities.html"> Incompatibilities</a></li>
<li class="toctree-l1"><a class="reference internal" href="../upgrading/index.html"> Upgrading</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/cloudera.html">Cloudera Manager</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/ambari.html">Apache Ambari</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/mapr.html">MapR</a></li>
<li class="toctree-l2"><a class="reference internal" href="../upgrading/packages.html">Packages</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../security/index.html"> Security</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../security/perimeter-security.html">Perimeter Security</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/authorization.html">Authorization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/impersonation.html">Impersonation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/system-services.html">Enabling SSL for System Services</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/secure-storage.html">Secure Storage</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../operations/index.html"> Operations</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../operations/logging.html"> Logging and Monitoring</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/metrics.html"> Metrics</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/operations-dashboard.html"> Dashboard and Reports</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/preferences.html"> Preferences and Runtime Arguments</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/scaling-instances.html"> Scaling Instances</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/resource-guarantees.html"> Resource Guarantees in YARN</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/tx-maintenance.html"> Transaction Service Maintenance</a></li>
<li class="toctree-l2"><a class="reference internal" href="../operations/cdap-ui.html"> CDAP UI</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../appendices/index.html"> Appendices</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../appendices/cdap-site.html"> Appendix: cdap-site.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/cdap-security.html"> Appendix: cdap-security.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/minimal-cdap-site.html"> Appendix: Minimal cdap-site.xml</a></li>
<li class="toctree-l2"><a class="reference internal" href="../appendices/hbase-ddl-executor.html"> Appendix: HBaseDDLExecutor</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="installation-using-apache-ambari">
<span id="admin-installation-ambari"></span><h1>Installation using Apache Ambari<a class="headerlink" href="#installation-using-apache-ambari" title="Permalink to this headline">🔗</a></h1>
<div class="admin-manual-step-images container">
<a class="reference internal" href="#preparing-the-cluster"><img alt="step-1" class="align-top" src="../_images/step-1.png" style="width: 166.0px; height: 90.0px;" /></a><a class="reference internal" href="#downloading-and-distributing-packages"><img alt="step-2" class="align-top" src="../_images/step-2.png" style="width: 230.0px; height: 90.0px;" /></a><a class="reference internal" href="#installing-cdap-services"><img alt="step-3" class="align-top" src="../_images/step-3.png" style="width: 187.0px; height: 90.0px;" /></a><a class="reference internal" href="#starting-cdap-services"><img alt="step-4" class="align-top" src="../_images/step-4.png" style="width: 187.0px; height: 90.0px;" /></a><a class="reference internal" href="#verification"><img alt="step-5" class="align-top" src="../_images/step-5.png" style="width: 165.0px; height: 90.0px;" /></a></div>
<p class="rubric">Notes</p>
<ul class="simple">
<li><a class="reference external" href="https://ambari.apache.org/">Apache Ambari</a> can only be used to add CDAP to an <strong>existing</strong>
Hadoop cluster, one that already has the required services (Hadoop: HDFS, YARN, HBase,
ZooKeeper, and—optionally—Hive and Spark) installed.</li>
<li>Ambari is for setting up HDP (Hortonworks Data Platform) on bare clusters; it can’t be
used for clusters with HDP already installed, where the original installation was
<strong>not</strong> with Ambari.</li>
<li>A number of features are currently planned to be added, including:<ul>
<li>select <a class="reference external" href="https://issues.cask.co/browse/CDAP-4108">CDAP metrics</a> and</li>
<li>a full <a class="reference external" href="https://issues.cask.co/browse/CDAP-4105">smoke test of CDAP functionality</a> after installation.</li>
</ul>
</li>
<li>If you are installing CDAP with the intention of using <em>replication,</em> see these
instructions on <a class="reference internal" href="replication.html#installation-replication"><span class="std std-ref">CDAP Replication</span></a> <em>before</em> installing or starting CDAP.</li>
</ul>
<div class="section" id="preparing-the-cluster">
<h2>Preparing the Cluster<a class="headerlink" href="#preparing-the-cluster" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="hadoop-configuration">
<h3>Hadoop Configuration<a class="headerlink" href="#hadoop-configuration" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic">
<li><p class="first">ZooKeeper’s <code class="docutils literal notranslate"><span class="pre">maxClientCnxns</span></code> must be raised from its default.  We suggest setting it to zero
(0: unlimited connections). As each YARN container launched by CDAP makes a connection to ZooKeeper,
the number of connections required is a function of usage.</p>
</li>
<li><p class="first">Ensure that YARN has sufficient memory capacity by lowering the default minimum container
size (controlled by the property <code class="docutils literal notranslate"><span class="pre">yarn.scheduler.minimum-allocation-mb</span></code>). Lack of
YARN memory capacity is the leading cause of apparent failures that we see reported.
We recommend starting with these settings:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.delete.debug-delay-sec</span></code>: 43200 <em>(see note below)</em></li>
<li><code class="docutils literal notranslate"><span class="pre">yarn.scheduler.minimum-allocation-mb</span></code>: 512 mb</li>
</ul>
<p>The value we recommend for <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.delete.debug-delay-sec</span></code> (<code class="docutils literal notranslate"><span class="pre">43200</span></code> or 12
hours) is what we use internally at Cask for testing as that provides adequate time to
capture the logs of any failures. However, you should use an appropriate non-zero value
specific to your environment. A large value can be expensive from a storage perspective.</p>
<p>Please ensure your <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.resource.cpu-vcores</span></code> and
<code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.resource.memory-mb</span></code> settings are set sufficiently to run CDAP,
as described in the <a class="reference internal" href="../system-requirements.html#admin-manual-memory-core-requirements"><span class="std std-ref">CDAP Memory and Core Requirements</span></a>.</p>
</li>
</ol>
<p>You can make these changes during the configuration of your cluster <a class="reference external" href="http://docs.hortonworks.com/HDPDocuments/Ambari-2.4.2.0/bk_ambari-installation/content/customize_services.html">using Ambari</a>.</p>
</div>
<div class="section" id="hdfs-permissions">
<span id="ambari-hdfs-permissions"></span><h3>HDFS Permissions<a class="headerlink" href="#hdfs-permissions" title="Permalink to this headline">🔗</a></h3>
<p>Ensure YARN is configured properly to run MapReduce programs.  Often, this includes
ensuring that the HDFS <code class="docutils literal notranslate"><span class="pre">/user/yarn</span></code> and <code class="docutils literal notranslate"><span class="pre">/user/cdap</span></code> directories exist with proper
permissions:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> su hdfs
<span class="gp">$</span> hadoop fs -mkdir -p /user/yarn &amp;&amp; hadoop fs -chown yarn:yarn /user/yarn
<span class="gp">$</span> hadoop fs -mkdir -p /user/cdap &amp;&amp; hadoop fs -chown cdap:cdap /user/cdap
</pre>
</div>
</div>
</div>
<div class="section" id="downloading-and-distributing-packages">
<h2>Downloading and Distributing Packages<a class="headerlink" href="#downloading-and-distributing-packages" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="downloading-cdap-ambari-service">
<h3>Downloading CDAP Ambari Service<a class="headerlink" href="#downloading-cdap-ambari-service" title="Permalink to this headline">🔗</a></h3>
<p>To install CDAP on a cluster managed by Ambari, we have available packages for
RHEL-compatible and Ubuntu systems, which you can install onto your Ambari management server.
These packages add CDAP to the list of available services which Ambari can install.</p>
<p>To install the <code class="docutils literal notranslate"><span class="pre">cdap-ambari-service</span></code> package, first add the appropriate CDAP repository
to your system’s package manager by following the steps below. These steps will install a
Cask repository on your Ambari server.</p>
<p>The <strong>repository version</strong> (shown in the commands below as <code class="docutils literal notranslate"><span class="pre">cdap/6.1</span></code>)
must match the <strong>CDAP series</strong> which you’d like installed on your cluster. To install the
<strong>latest</strong> version of the <em>CDAP 4.1 series,</em> you would install the <em>CDAP 4.1 repository.</em></p>
<p>Replace—in the commands that follow on this page—all references to
<code class="docutils literal notranslate"><span class="pre">cdap/6.1</span></code> with the CDAP Repository corresponding to the version
that you would like to use (such as <code class="docutils literal notranslate"><span class="pre">cdap/4.0</span></code> for CDAP 4.0.x):</p>
<table border="1" class="docutils" id="ambari-compatibility-matrix">
<colgroup>
<col width="55%" />
<col width="45%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd"><th class="head" colspan="2">Supported Hortonworks Data Platform (HDP) Distributions</th>
</tr>
<tr class="row-even"><th class="head">CDAP Series or Release</th>
<th class="head">Hadoop Distributions</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-odd"><td>CDAP 4.1.1, 4.2.x</td>
<td>HDP 2.0 through HDP 2.6</td>
</tr>
<tr class="row-even"><td>CDAP 4.1.0</td>
<td>HDP 2.0 through HDP 2.5</td>
</tr>
<tr class="row-odd"><td>CDAP 4.0.x</td>
<td>HDP 2.0 through HDP 2.5</td>
</tr>
<tr class="row-even"><td>CDAP 3.6.x</td>
<td>HDP 2.0 through HDP 2.4</td>
</tr>
<tr class="row-odd"><td>CDAP 3.5.x</td>
<td>HDP 2.0 through HDP 2.4</td>
</tr>
<tr class="row-even"><td>CDAP 3.4.x</td>
<td>HDP 2.0 through HDP 2.4</td>
</tr>
<tr class="row-odd"><td>CDAP 3.3.x</td>
<td>HDP 2.0 through HDP 2.3</td>
</tr>
<tr class="row-even"><td>CDAP 3.2.x</td>
<td>HDP 2.0 through HDP 2.3</td>
</tr>
<tr class="row-odd"><td>CDAP 3.1.x</td>
<td>HDP 2.0 through HDP 2.2</td>
</tr>
<tr class="row-even"><td>CDAP 3.0.x</td>
<td>HDP 2.0 and HDP 2.1</td>
</tr>
</tbody>
</table>
<p id="ambari-compatibility-matrix-end"><strong>Notes:</strong></p>
<ul class="simple">
<li>The CDAP Ambari service has been tested on Ambari Server 2.3 through 2.5, as
supplied from Hortonworks.</li>
<li>To install a version lower than the highest current version (such as <em>CDAP 4.1.0</em> when
<em>4.1.1</em> is available), you will need to downgrade your repo after installing it.</li>
</ul>
<div class="section" id="on-rpm-using-yum">
<h4>On RPM using Yum<a class="headerlink" href="#on-rpm-using-yum" title="Permalink to this headline">🔗</a></h4>
<p>Download the Cask Yum repo definition file:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> sudo curl -o /etc/yum.repos.d/cask.repo http://repository.cask.co/centos/6/x86_64/cdap/6.1/cask.repo
</pre>
</div>
<p>This will create the file <code class="docutils literal notranslate"><span class="pre">/etc/yum.repos.d/cask.repo</span></code> with:</p>
<pre class="literal-block">
[cask]
name=Cask Packages
baseurl=https://repository.cask.co/centos/6/x86_64/cdap/6.1
enabled=1
gpgcheck=1
</pre>
<p>Add the Cask Public GPG Key to your repository:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> sudo rpm --import http://repository.cask.co/centos/6/x86_64/cdap/6.1/pubkey.gpg
</pre>
</div>
<p>Update your Yum cache:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo yum makecache
</pre></div>
</div>
</div>
<div class="section" id="on-debian-using-apt">
<h4>On Debian using APT<a class="headerlink" href="#on-debian-using-apt" title="Permalink to this headline">🔗</a></h4>
<p>Download the Cask APT repo definition file:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> sudo curl -o /etc/apt/sources.list.d/cask.list http://repository.cask.co/ubuntu/precise/amd64/cdap/6.1/cask.list
</pre>
</div>
<p>This will create the file <code class="docutils literal notranslate"><span class="pre">/etc/apt/sources.list.d/cask.list</span></code> with:</p>
<pre class="literal-block">
deb [ arch=amd64 ] http://repository.cask.co/ubuntu/precise/amd64/cdap/6.1 precise cdap
</pre>
<p>Add the Cask Public GPG Key to your repository:</p>
<div class="highlight container">
<pre class="literal-block">
<span class="gp">$</span> curl -s http://repository.cask.co/ubuntu/precise/amd64/cdap/6.1/pubkey.gpg | sudo apt-key add -
</pre>
</div>
<p>Update your APT-cache:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo apt-get update
</pre></div>
</div>
</div>
</div>
<div class="section" id="installing-cdap-ambari-service">
<h3>Installing CDAP Ambari Service<a class="headerlink" href="#installing-cdap-ambari-service" title="Permalink to this headline">🔗</a></h3>
<p>Now, install the <code class="docutils literal notranslate"><span class="pre">cdap-ambari-service</span></code> package from the repo you specified above:</p>
<div class="section" id="installing-the-cdap-service-via-yum">
<h4>Installing the CDAP Service via YUM<a class="headerlink" href="#installing-the-cdap-service-via-yum" title="Permalink to this headline">🔗</a></h4>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo yum install -y cdap-ambari-service
<span class="gp">$</span> sudo ambari-server restart
</pre></div>
</div>
</div>
<div class="section" id="installing-the-cdap-service-via-apt">
<h4>Installing the CDAP Service via APT<a class="headerlink" href="#installing-the-cdap-service-via-apt" title="Permalink to this headline">🔗</a></h4>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> sudo apt-get install -y cdap-ambari-service
<span class="gp">$</span> sudo ambari-server restart
</pre></div>
</div>
</div>
</div>
</div>
<div class="section" id="installing-cdap-services">
<h2>Installing CDAP Services<a class="headerlink" href="#installing-cdap-services" title="Permalink to this headline">🔗</a></h2>
<p>You can now install CDAP using the Ambari Service Wizard.</p>
<div class="section" id="start-the-ambari-service-wizard">
<h3>Start the Ambari Service Wizard<a class="headerlink" href="#start-the-ambari-service-wizard" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic">
<li><p class="first">In the Ambari UI (the Ambari Dashboard), start the <strong>Add Service Wizard</strong>.</p>
<div class="figure align-center" id="id1" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss01-add-service.png"><img alt="../_images/ss01-add-service.png" class="bordered-image" src="../_images/ss01-add-service.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Starting the <em>Add Service</em> Wizard</span></p>
</div>
</li>
<li><p class="first">Select CDAP from the list and click <em>Next</em>. If there are core dependencies which are not
currently installed on the cluster, Ambari will prompt you to install them.</p>
<div class="figure align-center" id="id2" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss02-select-cdap.png"><img alt="../_images/ss02-select-cdap.png" class="bordered-image" src="../_images/ss02-select-cdap.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Selecting <em>CDAP</em></span></p>
</div>
</li>
</ol>
</div>
<div class="section" id="assign-cdap-services-to-hosts">
<h3>Assign CDAP Services to Hosts<a class="headerlink" href="#assign-cdap-services-to-hosts" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic" start="3">
<li><p class="first">Next, assign CDAP services to hosts.</p>
<p>CDAP consists of five daemons:</p>
<ol class="arabic simple">
<li><strong>Master:</strong> Coordinator service which launches CDAP system services into YARN</li>
<li><strong>Router:</strong> Serves HTTP endpoints for CDAP applications and REST API</li>
<li><strong>Auth Server:</strong> For managing authentication tokens on CDAP clusters with perimeter security enabled</li>
<li><strong>Kafka Server:</strong> For transporting CDAP metrics and CDAP system service log data</li>
<li><strong>UI:</strong> Web interface to CDAP and <a class="reference external" href="../../../developer-manual/pipelines/studio.html#cdap-studio" title="(in Cask Data Application Platform v6.1.1)"><span class="xref std std-ref">CDAP Studio</span></a></li>
</ol>
<div class="figure align-center" id="id3" style="width: 100%">
<a class="bordered-image-top-margin reference internal image-reference" href="../_images/ss03-assign-masters.png"><img alt="../_images/ss03-assign-masters.png" class="bordered-image-top-margin" src="../_images/ss03-assign-masters.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Assigning Masters</span></p>
</div>
<p>We recommended you install all CDAP services onto an edge node (or the <em>NameNode</em>, for
smaller clusters) such as in our example above. After assigning the master hosts, click
<em>Next</em>.</p>
</li>
<li><p class="first">Select hosts for the CDAP CLI client. This should be installed on every edge node on
the cluster or, for smaller clusters, on the same node as the CDAP services.</p>
<div class="figure align-center" id="id4" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss04-choose-clients.png"><img alt="../_images/ss04-choose-clients.png" class="bordered-image" src="../_images/ss04-choose-clients.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Selecting hosts for <em>CDAP</em></span></p>
</div>
</li>
<li><p class="first">Click <em>Next</em> to customize the CDAP installation.</p>
</li>
</ol>
</div>
<div class="section" id="customize-cdap">
<h3>Customize CDAP<a class="headerlink" href="#customize-cdap" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic" start="6">
<li><p class="first">On the <strong>Customize Services</strong> screen, you can configure both CDAP features and the
environment settings for CDAP and the CDAP services which run on the edge nodes. At the bottom of
the <em>Settings</em> tab are settings for common CDAP features and Java services.</p>
<div class="figure align-center" id="id5" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss05-config-cdap.png"><img alt="../_images/ss05-config-cdap.png" class="bordered-image" src="../_images/ss05-config-cdap.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Customizing Services, CDAP Features and Java Services</span></p>
</div>
</li>
<li><p class="first">On the <strong>Customize Services</strong> screen, click the <em>Advanced</em> tab to bring up the complete CDAP configuration.
Under <em>Advanced cdap-env</em>, you can configure environment settings such as heap sizes
and the directories used to store logs and PIDs for the CDAP services which run on the edge nodes.</p>
<div class="figure align-center" id="id6" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss06-config-cdap-env.png"><img alt="../_images/ss06-config-cdap-env.png" class="bordered-image" src="../_images/ss06-config-cdap-env.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Customizing Services 2</span></p>
</div>
<p>Under <em>Advanced cdap-site</em>, you can configure all options for the operation and running
of CDAP and CDAP applications.</p>
<p><strong>Additional CDAP configuration properties</strong>, not shown in the web interface, can be
added using Ambari’s advanced custom properties at the end of the page. Documentation
of the available CDAP properties is in the <a class="reference internal" href="../appendices/cdap-site.html#appendix-cdap-site-xml"><span class="std std-ref">Appendix: cdap-site.xml, cdap-default.xml</span></a>.</p>
<p>For a <strong>complete explanation of these options,</strong> refer to the <a class="reference internal" href="../appendices/cdap-site.html#appendix-cdap-site-xml"><span class="std std-ref">CDAP documentation
of cdap-site.xml</span></a>.</p>
<p><strong>Additional environment variables</strong> can be set, as required, using Ambari’s
<em>Configs &gt; Advanced &gt; Advanced cdap-env</em>.</p>
<p>When finished with configuration changes, click <em>Next</em>.</p>
</li>
</ol>
</div>
</div>
<div class="section" id="starting-cdap-services">
<h2>Starting CDAP Services<a class="headerlink" href="#starting-cdap-services" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="deploying-cdap">
<h3>Deploying CDAP<a class="headerlink" href="#deploying-cdap" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic" start="8">
<li><p class="first">Review the desired service layout and click <em>Deploy</em> to begin the actual deployment of CDAP.</p>
<div class="figure align-center" id="id7" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss07-review-deploy.png"><img alt="../_images/ss07-review-deploy.png" class="bordered-image" src="../_images/ss07-review-deploy.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Summary of Services</span></p>
</div>
</li>
<li><p class="first">Ambari will install CDAP and start the services.</p>
<div class="figure align-center" id="id8" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss08-install-start-test.png"><img alt="../_images/ss08-install-start-test.png" class="bordered-image" src="../_images/ss08-install-start-test.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Install, Start, and Test</span></p>
</div>
</li>
<li><p class="first">After the services are installed and started, you will click <em>Next</em> to get to the
Summary screen.</p>
</li>
<li><p class="first">This screen shows a summary of the changes that were made to the cluster. No services
should need to be restarted following this operation.</p>
<div class="figure align-center" id="id9" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss09-post-install-summary.png"><img alt="../_images/ss09-post-install-summary.png" class="bordered-image" src="../_images/ss09-post-install-summary.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Summary</span></p>
</div>
</li>
<li><p class="first">Click <em>Complete</em> to complete the CDAP installation.</p>
</li>
</ol>
</div>
<div class="section" id="cdap-started">
<h3>CDAP Started<a class="headerlink" href="#cdap-started" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic simple" start="13">
<li>You should now see <strong>CDAP</strong> listed on the main summary screen for your cluster.</li>
</ol>
<blockquote>
<div><div class="figure align-center" id="id10" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss10-main-screen.png"><img alt="../_images/ss10-main-screen.png" class="bordered-image" src="../_images/ss10-main-screen.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> Selecting <em>CDAP</em></span></p>
</div>
</div></blockquote>
</div>
</div>
<div class="section" id="verification">
<span id="ambari-verification"></span><h2>Verification<a class="headerlink" href="#verification" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="service-checks-in-apache-ambari">
<h3>Service Checks in Apache Ambari<a class="headerlink" href="#service-checks-in-apache-ambari" title="Permalink to this headline">🔗</a></h3>
<ol class="arabic simple" start="14">
<li>Selecting <em>CDAP</em> from the left sidebar, or choosing it from the Services drop-down menu, will take
you to the CDAP service screen.</li>
</ol>
<blockquote>
<div><div class="figure align-center" id="id11" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ss11-cdap-screen.png"><img alt="../_images/ss11-cdap-screen.png" class="bordered-image" src="../_images/ss11-cdap-screen.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>Ambari Dashboard:</strong> <em>CDAP</em> Service Screen</span></p>
</div>
</div></blockquote>
<p>CDAP is now running on your cluster, managed by Ambari. You can login to the CDAP UI at
the address of the node running the CDAP UI service at port 11011. The drop-down <em>Quick
Links</em> menu has a menu item directly to the CDAP UI.</p>
</div>
<div class="section" id="cdap-smoke-test">
<h3>CDAP Smoke Test<a class="headerlink" href="#cdap-smoke-test" title="Permalink to this headline">🔗</a></h3>
<p>The CDAP UI may initially show errors while all of the CDAP YARN containers are
starting up. Allow for up to a few minutes for this.</p>
<p>The <em>Administration</em> page of the CDAP UI shows the status of the CDAP services.
It can be reached at <code class="docutils literal notranslate"><span class="pre">http://&lt;cdap-host&gt;:11011/cdap/administration</span></code>, substituting for
<code class="docutils literal notranslate"><span class="pre">&lt;cdap-host&gt;</span></code> the host name or IP address of the CDAP server:</p>
<div class="figure align-center" id="id12" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/console-distributed.png"><img alt="../_images/console-distributed.png" class="bordered-image" src="../_images/console-distributed.png" style="width: 800px;" /></a>
<p class="caption"><span class="caption-text"><strong>CDAP UI:</strong> Showing started-up, <em>Administration</em> page.</span></p>
</div>
</div>
</div>
<div class="section" id="advanced-topics">
<span id="ambari-installation-advanced-topics"></span><h2>Advanced Topics<a class="headerlink" href="#advanced-topics" title="Permalink to this headline">🔗</a></h2>
<ul class="simple">
<li><a class="reference internal" href="#ambari-configuration-security"><span class="std std-ref">Enabling Security</span></a></li>
<li><a class="reference internal" href="#ambari-configuration-enabling-kerberos"><span class="std std-ref">Enabling Kerberos</span></a></li>
<li><a class="reference internal" href="#ambari-configuration-highly-available"><span class="std std-ref">Enabling CDAP High Availability</span></a></li>
<li><a class="reference internal" href="#ambari-configuration-enabling-hive-execution-engines"><span class="std std-ref">Enabling Hive Execution Engines</span></a></li>
<li><a class="reference internal" href="#ambari-configuration-enabling-spark2"><span class="std std-ref">Enabling Spark2</span></a></li>
</ul>
<span class="target" id="ambari-configuration-security"></span><div class="section" id="enabling-security">
<h3>Enabling Security<a class="headerlink" href="#enabling-security" title="Permalink to this headline">🔗</a></h3>
<p>Cask Data Application Platform (CDAP) supports securing clusters using perimeter security, authorization,
impersonation and secure storage.</p>
<p>Network (or cluster) perimeter security limits outside access, providing a first level of
security. However, perimeter security itself does not provide the safeguards of authentication,
authorization and service request management that a secure Hadoop cluster provides.</p>
<p>Authorization provides a way of enforcing access control on CDAP entities.</p>
<p>Impersonation ensures that programs inside CDAP are run as configured users at the namespace level. When enabled, it
guarantees that all actions on datasets, streams and other resources happen as the configured user.</p>
<p>We recommend that in order for CDAP to be secure, CDAP security should always be used in conjunction with
<a class="reference external" href="http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SecureMode.html">secure Hadoop clusters</a>.
In cases where secure Hadoop is not or cannot be used, it is inherently insecure and any applications
running on the cluster are effectively “trusted”. Although there is still value in having perimeter security,
authorization enforcement and secure storage in that situation, whenever possible a secure Hadoop
cluster should be employed with CDAP security.</p>
<p>For instructions on enabling CDAP Security, see <a class="reference internal" href="../security/index.html#admin-security"><span class="std std-ref">CDAP Security</span></a>.</p>
<p><a class="reference internal" href="../security/index.html#admin-security"><span class="std std-ref">CDAP Security</span></a> is configured by setting the appropriate
settings under Ambari for your environment.</p>
</div>
<div class="section" id="enabling-kerberos">
<span id="ambari-configuration-enabling-kerberos"></span><h3>Enabling Kerberos<a class="headerlink" href="#enabling-kerberos" title="Permalink to this headline">🔗</a></h3>
<p>Kerberos support in CDAP is automatically enabled when enabling Kerberos security on your
cluster via Ambari. Consult the appropriate Ambari documentation for instructions on enabling
Kerberos support for your cluster.</p>
<p>The <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user must be able to launch YARN containers, which can be accomplished by
adjusting the YARN <code class="docutils literal notranslate"><span class="pre">min.user.id</span></code> (to 500) to include the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user. (As Ambari does
not have a mechanism for setting the YARN <code class="docutils literal notranslate"><span class="pre">allowed.system.users</span></code>—the preferred
method of enabling the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user as it is more precise and limited—the setting
of <code class="docutils literal notranslate"><span class="pre">min.user.id</span></code> needs to be used instead.)</p>
<ol class="upperalpha">
<li><p class="first">If you are <strong>adding CDAP</strong> to an existing Kerberos cluster, in order to configure <strong>CDAP for
Kerberos authentication</strong>:</p>
<ol class="arabic">
<li><p class="first">The <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code> is shown in the commands that follow as <code class="docutils literal notranslate"><span class="pre">cdap</span></code>;
however, you are free to use a different appropriate name.</p>
</li>
<li><p class="first">When running on a secure HBase cluster, as the <code class="docutils literal notranslate"><span class="pre">hbase</span></code> user, issue the command:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> <span class="nb">echo</span> <span class="s2">&quot;grant &#39;cdap&#39;, &#39;RWCA&#39;&quot;</span> <span class="p">|</span> hbase shell
</pre></div>
</div>
</li>
<li><p class="first">In order to configure <strong>CDAP Explore Service for secure Hadoop:</strong></p>
<ol class="lowerroman">
<li><p class="first">To allow CDAP to act as a Hive client, it must be given <code class="docutils literal notranslate"><span class="pre">proxyuser</span></code> permissions and allowed
from all hosts. For example: set the following properties in the configuration file <code class="docutils literal notranslate"><span class="pre">core-site.xml</span></code>,
where <code class="docutils literal notranslate"><span class="pre">cdap</span></code> is a system group to which the <code class="docutils literal notranslate"><span class="pre">cdap</span></code> user is a member:</p>
<div class="highlight-xml notranslate"><div class="highlight"><pre><span></span><span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>hadoop.proxyuser.hive.groups<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>cdap,hadoop,hive<span class="nt">&lt;/value&gt;</span>
<span class="nt">&lt;/property&gt;</span>
<span class="nt">&lt;property&gt;</span>
  <span class="nt">&lt;name&gt;</span>hadoop.proxyuser.hive.hosts<span class="nt">&lt;/name&gt;</span>
  <span class="nt">&lt;value&gt;</span>*<span class="nt">&lt;/value&gt;</span>
<span class="nt">&lt;/property&gt;</span>
</pre></div>
</div>
</li>
<li><p class="first">To execute Hive queries on a secure cluster, the cluster must be running the MapReduce <code class="docutils literal notranslate"><span class="pre">JobHistoryServer</span></code>
service. Consult your distribution documentation on the proper configuration of this service.</p>
</li>
<li><p class="first">To execute Hive queries on a secure cluster using the CDAP Explore Service, the Hive MetaStore service
must be configured for Kerberos authentication. Consult your distribution documentation on the proper
configuration of the Hive MetaStore service.</p>
</li>
</ol>
<p>With all these properties set, the CDAP Explore Service will run on secure Hadoop clusters.</p>
</li>
</ol>
</li>
<li><p class="first">If you are <strong>adding Kerberos</strong> to an existing cluster, in order to configure <strong>CDAP for
Kerberos authentication</strong>:</p>
<ol class="arabic">
<li><p class="first">The <code class="docutils literal notranslate"><span class="pre">/cdap</span></code> directory needs to be owned by the <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code>; you can set
that by running the following command as the <code class="docutils literal notranslate"><span class="pre">hdfs</span></code> user (change the ownership in the
command from <code class="docutils literal notranslate"><span class="pre">cdap</span></code> to whatever is the <code class="docutils literal notranslate"><span class="pre">&lt;cdap-principal&gt;</span></code>):</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> su hdfs <span class="o">&amp;&amp;</span> hadoop fs -mkdir -p /cdap <span class="o">&amp;&amp;</span> hadoop fs -chown cdap /cdap
</pre></div>
</div>
</li>
<li><p class="first">When converting an existing CDAP cluster to being Kerberos-enabled, you may
run into YARN usercache directory permission problems. A non-Kerberos cluster with
default settings will run CDAP containers as the user <code class="docutils literal notranslate"><span class="pre">yarn</span></code>. A Kerberos cluster will
run them as the user <code class="docutils literal notranslate"><span class="pre">cdap</span></code>. When converting, the usercache directory that YARN
creates will already exist and be owned by a different user. On all datanodes, run this
command, substituting in the correct value of the YARN parameter <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.local-dirs</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> rm -rf &lt;YARN.NODEMANAGER.LOCAL-DIRS&gt;/usercache/cdap
</pre></div>
</div>
<p>(As <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.local-dirs</span></code> can be a comma-separated list of directories, you may
need to run this command multiple times, once for each entry.)</p>
<p>If, for example, the setting for <code class="docutils literal notranslate"><span class="pre">yarn.nodemanager.local-dirs</span></code> is <code class="docutils literal notranslate"><span class="pre">/yarn/nm</span></code>, you would use:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$</span> rm -rf /yarn/nm/usercache/cdap
</pre></div>
</div>
<p>Restart CDAP after removing the usercache(s).</p>
</li>
</ol>
</li>
</ol>
</div>
<div class="section" id="enabling-cdap-ha">
<span id="ambari-configuration-highly-available"></span><h3>Enabling CDAP HA<a class="headerlink" href="#enabling-cdap-ha" title="Permalink to this headline">🔗</a></h3>
<p>In addition to having a <a class="reference internal" href="../deployment-architectures.html#admin-manual-install-deployment-architectures-ha"><span class="std std-ref">cluster architecture</span></a>
that supports HA (high availability), these additional configuration steps need to be followed and completed:</p>
<div class="section" id="cdap-components">
<h4>CDAP Components<a class="headerlink" href="#cdap-components" title="Permalink to this headline">🔗</a></h4>
<p>For each of the CDAP components listed below (Master, Router, Kafka, UI, Authentication Server), these
comments apply:</p>
<ul class="simple">
<li>Sync the configuration files (such as <code class="docutils literal notranslate"><span class="pre">cdap-site.xml</span></code> and <code class="docutils literal notranslate"><span class="pre">cdap-security.xml</span></code>) on all the nodes.</li>
<li>While the default <em>bind.address</em> settings (<code class="docutils literal notranslate"><span class="pre">0.0.0.0</span></code>, used for <code class="docutils literal notranslate"><span class="pre">app.bind.address</span></code>,
<code class="docutils literal notranslate"><span class="pre">data.tx.bind.address</span></code>, <code class="docutils literal notranslate"><span class="pre">router.bind.address</span></code>, and so on) can be synced across hosts,
if you customize them to a particular IP address, they will—as a result—be
different on different hosts. This can be controlled by the settings for an individual <em>Role Instance</em>.</li>
</ul>
</div>
<div class="section" id="cdap-master">
<h4>CDAP Master<a class="headerlink" href="#cdap-master" title="Permalink to this headline">🔗</a></h4>
<p>The CDAP Master service primarily performs coordination tasks and can be scaled for redundancy. The
instances coordinate amongst themselves, electing one as a leader at all times.</p>
<ul class="simple">
<li>Using the Ambari UI, add additional hosts for the <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Master</span>
<span class="pre">Service</span></code> to additional machines.</li>
</ul>
</div>
<div class="section" id="cdap-router">
<h4>CDAP Router<a class="headerlink" href="#cdap-router" title="Permalink to this headline">🔗</a></h4>
<p>The CDAP Router service is a stateless API endpoint for CDAP, and simply routes requests to the
appropriate service. It can be scaled horizontally for performance. A load balancer, if
desired, can be placed in front of the nodes running the service.</p>
<ul class="simple">
<li>Using the Ambari UI, add additional hosts for the <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Router</span> <span class="pre">Service</span></code> to additional machines.</li>
<li>Start each <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Router</span> <span class="pre">Service</span></code> role.</li>
</ul>
</div>
<div class="section" id="cdap-kafka">
<h4>CDAP Kafka<a class="headerlink" href="#cdap-kafka" title="Permalink to this headline">🔗</a></h4>
<ul class="simple">
<li>Using the Ambari UI, add additional hosts for the <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Kafka</span> <span class="pre">Service</span></code>
to additional machines.</li>
<li>Two properties govern the Kafka setting in the cluster:<ul>
<li>The <strong>list of Kafka seed brokers</strong> is generated automatically, but the
replication factor (<code class="docutils literal notranslate"><span class="pre">kafka.server.default.replication.factor</span></code>) is not set
automatically. Instead, it needs to be set manually.</li>
<li>The <strong>replication factor</strong> is used to replicate Kafka messages across
multiple machines to prevent data loss in the event of a hardware
failure.</li>
</ul>
</li>
<li>The recommended setting is to run <strong>at least two</strong> Kafka brokers with a <strong>minimum replication
factor of two</strong>; set this property to the maximum number of tolerated machine failures
plus one (assuming you have that number of machines). For example, if you were running
five Kafka brokers, and would tolerate two of those failing, you would set the
replication factor to three. The number of Kafka brokers listed should always be equal to
or greater than the replication factor.</li>
</ul>
</div>
<div class="section" id="cdap-ui">
<h4>CDAP UI<a class="headerlink" href="#cdap-ui" title="Permalink to this headline">🔗</a></h4>
<ul class="simple">
<li>Using the Ambari UI, add additional hosts for the <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">UI</span> <span class="pre">Service</span></code>
to additional machines.</li>
</ul>
</div>
<div class="section" id="cdap-authentication-server">
<h4>CDAP Authentication Server<a class="headerlink" href="#cdap-authentication-server" title="Permalink to this headline">🔗</a></h4>
<ul class="simple">
<li>Using the Ambari UI, add additional hosts for the <code class="docutils literal notranslate"><span class="pre">CDAP</span> <span class="pre">Security</span> <span class="pre">Auth</span>
<span class="pre">Service</span></code> (the CDAP Authentication Server) to additional machines.</li>
<li>Note that when an unauthenticated request is made in a secure HA setup, a list of all
running authentication endpoints will be returned in the body of the request.</li>
</ul>
</div>
</div>
<div class="section" id="hive-execution-engines">
<span id="ambari-configuration-enabling-hive-execution-engines"></span><h3>Hive Execution Engines<a class="headerlink" href="#hive-execution-engines" title="Permalink to this headline">🔗</a></h3>
<p>CDAP Explore has support for additional execution engines such as
<a class="reference external" href="http://spark.apache.org/">Apache Spark</a> and
<a class="reference external" href="http://tez.apache.org/">Apache Tez</a>. Details on specifying these engines and
configuring CDAP are in the Developer Manual section on Date Exploration,
<a class="reference external" href="../../../developer-manual/data-exploration/hive-execution-engines.html#hive-ee" title="(in Cask Data Application Platform v6.1.1)"><span class="xref std std-ref">Hive Execution Engines</span></a>.</p>
</div>
<div class="section" id="enabling-spark2">
<span id="ambari-configuration-enabling-spark2"></span><h3>Enabling Spark2<a class="headerlink" href="#enabling-spark2" title="Permalink to this headline">🔗</a></h3>
<p>In order to use Spark2, you must first install Spark2 on your cluster. If both Spark1
and Spark2 are installed, you must modify cdap-env to set SPARK_MAJOR_VERSION and SPARK_HOME:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">export SPARK_MAJOR_VERSION=2</span>
<span class="go">export SPARK_HOME=/usr/hdp/{{hdp_version}}/spark2</span>
</pre></div>
</div>
<p>When Spark2 is in use, Spark1 programs cannot be run. Similarly, when Spark1 is in use,
Spark2 programs cannot be run.</p>
<p>When CDAP starts up, it detects the spark version and uploads the corresponding pipeline
system artifact. If you have already started CDAP with Spark1,
you will also need to delete the pipeline system artifacts, then reload them in order
to use the spark2 versions. After CDAP has been restarted with Spark2, use the RESTful API:</p>
<pre class="literal-block">
<span class="gp">$</span> DELETE /v3/namespaces/system/artifacts/cdap-data-pipeline/versions/6.1.1
<span class="gp">$</span> DELETE /v3/namespaces/system/artifacts/cdap-data-streams/versions/6.1.1
<span class="gp">$</span> POST /v3/namespaces/system/artifacts
</pre>
</div>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="Installation on Amazon EMR using Bootstrap Actions" href="emr.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="Installation for MapR" href="mapr.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>